import matplotlib.pyplot as plt
import pandas as pd
df=pd.read_excel('旅游网站精华游记数据_预处理.xlsx')
plt.rcParams['font.sans-serif']='SimHei'
df_month=df.groupby('月份').size()

plt.figure(figsize=(10,5))
x=df_month.index
plt.plot(x,df_month,color=(0.894,0,0.498))
plt.xticks(range(1,13))
plt.xlabel('月份')
plt.ylabel('旅游次数')
plt.title('每月游客旅游次数折线图')
for a, b in zip(x, df_month):
    plt.text(a,b,'%d' % b, ha='center')
plt.show()


plt.figure(figsize=(10,9))
plt.subplot(2,1,1)
plt.hist(df['天数'], color=(0.984,0,0.498),edgecolor='k')
plt.xlabel('天数')
plt.ylabel('旅游次数')
plt.title('按天数统计旅游次数直方图')
plt.subplot(2,1,2)
plt.hist(df['人均消费（元）'],color=(0.894,0,0.498),edgecolor='k')
plt.xlabel('人均消费/元')
plt.ylabel('旅游次数')
plt.title('按人均消费统计旅游次数直方图')
plt.show()


data_label=df['旅行标签'].dropna()
label=data_label.str.split(expand=False)
label_list=[]
for i in label:
    label_list.extend(i)
df_label=pd.DataFrame(label_list, columns=['标签'])
df_label['次数']=1
df_label_count=df_label.groupby('标签').agg('count').sort_values(by='次数',ascending=False).head(5)
plt.figure()
plt.pie(df_label_count['次数'],labels=df_label_count.index,autopct='%.2f%%')
plt.title('游客旅游方式饼状图')
plt.show()



df1=df.dropna(subset='途经地点').reset_index(drop=True)
df_concat=pd.DataFrame()
for index in df1.index:
    place_list=df1.iloc[index][6].split(',')
    df_temp=pd.DataFrame(place_list,columns=['地点'])
    df_temp['阅览数']=df1.iloc[index][5]
    df_concat=pd.concat([df_concat,df_temp])
df_concat['次数']=1
df_concat=df_concat.reset_index(drop=True)
df_group=df_concat.groupby('地点').agg('sum')
plt.figure(figsize=(10,8))
plt.subplot(2,1,1)
df_place=df_group.sort_values(by='次数',ascending=False).head(10)
x=df_place.index
height=df_place['次数']
plt.bar(x,height,width=0.6,color=(0.894,0,0.498))
for a, b in zip(x, height):
    plt.text(a,b,'%d' % b, ha='center')
plt.ylabel('旅游次数')
plt.title('游记包含旅游地区次数前10名柱状图')
plt.subplot(2,1,2)
df_view=df_group.sort_values(by='阅览数',ascending=False).head(10)
x=df_view,index
height=df_view['阅览数']
plt.bar(x,height,width=0.6,color=(0.894,0,0.498))
for a, b in zip(x, height):
    plt.text(a, b, '%d' % b, ha='center')
plt.ylabel('阅览数')
plt.title('游记包含旅游地区阅览数前10名柱状图')
plt.show()












